** Table 4 **
cd "D:\Dropbox\unequal_gains\QJE revision plan\analysis\"
global Section4 "D:\Dropbox\unequal_gains\QJE revision plan\analysis\section4_data"
global resultspath "D:\Dropbox\unequal_gains\QJE revision plan\analysis\clean_results"

* 1) Panel A
use "$Section4/age_educ_race_children_state_groups_final", clear

* store sample sizes
gen N=.
gen sample_descr=" "
replace sample_descr="All" if [_n]==1
replace sample_descr="Age" if [_n]==2
replace sample_descr="Education" if [_n]==3
replace sample_descr="States" if [_n]==4
replace sample_descr="Race" if [_n]==5
replace sample_descr="Children" if [_n]==6

egen id=group(age_bin education statefip race children)
distinct id
replace N=r(ndistinct) if [_n]==1

distinct age_bin
replace N=r(ndistinct) if [_n]==2

distinct education
replace N=r(ndistinct) if [_n]==3

distinct statefip
replace N=r(ndistinct) if [_n]==4

distinct race
replace N=r(ndistinct) if [_n]==5

distinct children
replace N=r(ndistinct) if [_n]==6

* store distribution of shocks
gen mean=.
gen sd=.
gen IQR=.

gen log_change_descr=" "
replace log_change_descr="Raw" if [_n]==1
replace log_change_descr="linear age" if [_n]==2
replace log_change_descr="age fe" if [_n]==3
replace log_change_descr="educ fe" if [_n]==4
replace log_change_descr="state fe" if [_n]==5
replace log_change_descr="race fe" if [_n]==6
replace log_change_descr="children fe" if [_n]==7
replace log_change_descr="all fe" if [_n]==8

rename g_n_annual g_n_annual_1
rename g_n_annual_linageres g_n_annual_2
rename g_n_annual_ageres g_n_annual_3
rename g_n_annual_educres g_n_annual_4
rename g_n_annual_stateres g_n_annual_5
rename g_n_annual_raceres g_n_annual_6
rename g_n_annual_childrenres g_n_annual_7
rename g_n_annual_allres g_n_annual_8

foreach i of numlist 1(1)8 {
sum g_n_annual_`i' [aw=log((P+P_initial)/2)], d
replace mean=r(mean) if [_n]==`i'
replace sd=r(sd) if [_n]==`i'
replace IQR=r(p75)-r(p25) if [_n]==`i'
}

format mean sd IQR %8.3f
keep  N sample_descr id mean sd IQR log_change_descr
drop if [_n]>8

save "$resultspath/Table4a", replace


* 2) Panel B

* finalize data
use "$Section4/outcomes_final.dta", clear

merge 1:1 product_module_code quality_rank using "$Section4/instrument_age_educ_race_children_state"
drop if _merge==2
drop _merge

gen double RMS_ltornqvist_price_index_win  = log(RMS_tornqvist_price_index_win)
gen double HMS_ltornqvist_price_index_win  = log(tornqvist_price_index_win)

gen double RMS_lpaasche_price_index_win=log(RMS_paasche_price_index_win)
gen double RMS_llaspeyres_price_index_win=log(RMS_laspeyres_price_index_win)
gen double RMS_lces_price_index_win=log(RMS_ces_price_index_win)

* document analysis sample
gen analysis_sample=1
replace analysis_sample=0 if ///
missing(inst_raw) | ///
missing(RMS_avg_spending) | ///
missing(RMS_avg_realspending_growth_pc) | ///
missing(RMS_feenstra_win) | ///
missing(RMS_avg_tornqvist_infl) | ///
missing(RMS_avg_infl_full)
sum analysis_sample, d
sum analysis_sample [aw=RMS_avg_spending], d
distinct product_module_code if analysis_sample==1

keep if analysis_sample==1
save "$Section4/analysis_main.dta", replace

* summary statistics in product space
use "$Section4/analysis_main.dta", clear

* instrument controlling for age linear & price deciles
bysort product_module_code: gen dup=[_N]
keep if dup==10
drop dup 

distinct product_module_code
gen N=r(ndistinct)

gen spec_descr=" "
replace spec_descr="Raw" if [_n]==1
replace spec_descr="Age Linear" if [_n]==2
replace spec_descr="Age Linear & price decile FE" if [_n]==3
replace spec_descr="Age Linear & dept FE" if [_n]==4
replace spec_descr="Age Linear & product group FE" if [_n]==5
replace spec_descr="Age-Educ-State-Race-Children & product group - price decile FE" if [_n]==6

* prepare variables for table
sum inst_linageres [aw=log(RMS_avg_spending)], d
gen inst_linageres_mean=r(mean)
* instrument controlling for age linear & price deciles
reg inst_linageres i.quality_rank [aw=log(RMS_avg_spending)]
predict inst_hat
gen inst_3 = inst_linageres - inst_hat + inst_linageres_mean
drop inst_hat 
* instrument controlling for age linear & dept FE
reg inst_linageres i.department_code [aw=log(RMS_avg_spending)]
predict inst_hat
gen inst_4 = inst_linageres - inst_hat + inst_linageres_mean
drop inst_hat 
* instrument controlling for age linear & group FE
reg inst_linageres i.product_group_code [aw=log(RMS_avg_spending)]
predict inst_hat
gen inst_5 = inst_linageres - inst_hat + inst_linageres_mean
drop inst_hat 
* instrument controlling for all HH FE & group - quality rank FE
sum inst_allres [aw=log(RMS_avg_spending)], d
gen inst_allres_mean=r(mean)
reg inst_allres i.product_group_code i.quality_rank [aw=log(RMS_avg_spending)]
predict inst_hat
gen inst_6 = inst_allres - inst_hat + inst_allres_mean
drop inst_hat 

rename inst_raw inst_1 
rename inst_linageres inst_2

gen mean=.
gen sd=.
gen IQR=.

foreach i of numlist 1(1)6 {
sum inst_`i' [aw=log(RMS_avg_spending)], d
replace mean=r(mean) if [_n]==`i'
replace sd=r(sd) if [_n]==`i'
replace IQR=r(p75)-r(p25) if [_n]==`i'
}
drop if spec_descr==" "

keep mean sd IQR spec N
format mean sd IQR %8.4f

save "$resultspath/Table4b", replace


